Building a Map of Australia: Choropleth and Contiguous Cartogram

Author

C21380786

Published

April 17, 2025

# Function to quietly install and load packages
quiet_library <- function(pkg) {
  suppressMessages(suppressWarnings({
    if (!requireNamespace(pkg, quietly = TRUE)) {
      install.packages(pkg, quiet = TRUE)
    }
    library(pkg, character.only = TRUE)
  }))
}
options(repos = c(CRAN = "https://cloud.r-project.org"))

# Core libraries
quiet_library("ggplot2")
quiet_library("sf")
quiet_library("ggspatial")
quiet_library("rnaturalearth")
quiet_library("rnaturalearthdata")
quiet_library("geojsonio")
quiet_library("leaflet")
quiet_library("magick")
quiet_library("tidyverse")
quiet_library("viridisLite")
quiet_library("geosphere")
quiet_library("ggrepel")
quiet_library("cartogram")
quiet_library("rmapshaper")
quiet_library("tmap")


quiet_library("wordcloud")
quiet_library("RColorBrewer")
quiet_library("stopwords")
quiet_library("textdata")
quiet_library("tidytext")
quiet_library("stringr")
quiet_library("tidyr")
quiet_library("collapsibleTree")
quiet_library("text2vec")
quiet_library("dplyr")
quiet_library("widyr")
quiet_library("igraph")
quiet_library("ggraph")

Base Map of Australia

australia <- ne_countries(scale = "medium", country = "Australia", returnclass = "sf")

ggplot(data = australia) +
  geom_sf(fill = "antiquewhite") +
  theme_minimal() +
  ggtitle("Base Map of Australia")

Overlay Major Cities

cities <- data.frame(
  name = c("Sydney", "Melbourne", "Brisbane", "Perth", "Adelaide", "Darwin", "Canberra", "Hobart"),
  lon  = c(151.2093, 144.9631, 153.0251, 115.8575, 138.6007, 130.8456, 149.1300, 147.3272),
  lat  = c(-33.8688, -37.8136, -27.4698, -31.9505, -34.9285, -12.4634, -35.2809, -42.8821)
)

cities_sf <- st_as_sf(cities, coords = c("lon", "lat"), crs = 4326)

base_plot <- ggplot() +
  geom_sf(data = australia, fill = "antiquewhite") +
  geom_sf(data = cities_sf, color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), size = 3,
                  nudge_y = 1, nudge_x = 1) +
  theme_minimal() +
  ggtitle("Australia Map with Major Cities")
base_plot

Overlay State Boundaries

states <- st_read("gadm41_AUS_1.json", quiet = TRUE)

map_with_states <- base_plot +
  geom_sf(data = states, fill = NA, color = "blue", size = 0.7) +
  ggtitle("Australia Map with Cities and State Boundaries")
map_with_states

Overlay Route

# Coordinates for Sydney and Perth
sydney <- cities %>% filter(name == "Sydney")
perth  <- cities %>% filter(name == "Perth")

# Plot with route line
map_with_route <- ggplot() +
  geom_sf(data = states, fill = NA, color = "blue", size = 0.5) +
  geom_sf(data = australia, fill = "antiquewhite", color = "gray70") +

  # Cities
  geom_point(data = cities, aes(x = lon, y = lat), color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), vjust = -1) +

  # Curved route from Sydney to Perth
  geom_curve(aes(x = sydney$lon, y = sydney$lat,
                 xend = perth$lon, yend = perth$lat),
             curvature = 0.2, color = "purple", linewidth = 1,
             arrow = arrow(length = unit(0.15, "inches"))) +

  # Map scale and north arrow
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "tl", which_north = "true", 
                         style = north_arrow_fancy_orienteering) +

  # Theme and title
  theme_minimal() +
  ggtitle("Australia with Major Cities and Route: Sydney to Perth")

map_with_route
Scale on map varies by more than 10%, scale bar may be inaccurate

Add Median Income by State

# Read the CSV
income <- read.csv("medianincomeaustralia.csv")

# Join directly using matching NAME_1 and State columns
states_income <- left_join(states, income, by = c("NAME_1" = "State"))

# Plot the choropleth map
choropleth_map <- ggplot() +
  geom_sf(data = states_income, aes(fill = mdn_income), color = "black") +
  scale_fill_viridis_c(option = "magma", name = "Median Income") +
  geom_sf(data = australia, fill = NA, color = "gray50") +
  geom_sf(data = cities_sf, color = "red", size = 3) +
  geom_text_repel(data = cities, aes(x = lon, y = lat, label = name), size = 3) +
  theme_minimal() +
  ggtitle("Choropleth Map of Australia: Median Income (Aug 2024)")
choropleth_map

Annotate Map with Scale and Compass

choropleth_map_annotated <- choropleth_map +
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "bl", which_north = "true",
                         pad_x = unit(0.1, "in"), pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering)
choropleth_map_annotated
Scale on map varies by more than 10%, scale bar may be inaccurate

Reproject to EPSG:28355 (Australia Transverse Mercator)

australia_28355 <- st_transform(australia, crs = 28355)
states_income_28355 <- st_transform(states_income, crs = 28355)
cities_sf_28355 <- st_transform(cities_sf, crs = 28355)

reprojected_map <- ggplot() +
  geom_sf(data = australia_28355, fill = "antiquewhite") +
  geom_sf(data = states_income_28355, aes(fill = mdn_income), color = "black") +
  scale_fill_viridis_c(option = "magma") +
  geom_sf(data = cities_sf_28355, color = "red", size = 3) +
  geom_text_repel(data = as.data.frame(st_coordinates(cities_sf_28355)) %>% bind_cols(cities),
                  aes(x = X, y = Y, label = name), size = 3) +
  annotation_scale(location = "bl", width_hint = 0.5) +
  annotation_north_arrow(location = "bl", which_north = "true",
                         pad_x = unit(0.1, "in"), pad_y = unit(0.5, "in"),
                         style = north_arrow_fancy_orienteering) +
  theme_minimal() +
  ggtitle("Reprojected Choropleth Map of Australia (EPSG:28355)")
reprojected_map

Create Simplified State Shapes

states_simplified <- ms_simplify(states_income_28355, keep = 0.05, keep_shapes = TRUE)

Generate Cartogram

states_cartogram <- cartogram_cont(states_simplified, "mdn_income", itermax = 5)
Warning in cartogram_cont.sf(states_simplified, "mdn_income", itermax = 5): NA
not allowed in weight vector. Features will be removed from Shape.

Plot Cartogram Using tmap

tmap_mode("plot")
ℹ tmap mode set to "plot".
cartogram_map <- tm_shape(states_cartogram) +
  tm_polygons("mdn_income", palette = "Blues", style = "jenks", title = "Median Income") +
  tm_scale_bar(position = c("left", "bottom")) +
  tm_compass(position = c("right", "top"), type = "arrow") +
  tm_layout(title = "Contiguous Cartogram of Australia (Median Income)",
            inner.margins = c(0.1, 0.1, 0.1, 0.1))

── tmap v3 code detected ───────────────────────────────────────────────────────
[v3->v4] `tm_polygons()`: instead of `style = "jenks"`, use fill.scale =
`tm_scale_intervals()`.
ℹ Migrate the argument(s) 'style', 'palette' (rename to 'values') to
  'tm_scale_intervals(<HERE>)'[v3->v4] `tm_polygons()`: migrate the argument(s) related to the legend of the
visual variable `fill` namely 'title' to 'fill.legend = tm_legend(<HERE>)'! `tm_scale_bar()` is deprecated. Please use `tm_scalebar()` instead.[v3->v4] `tm_layout()`: use `tm_title()` instead of `tm_layout(title = )`
cartogram_map
[cols4all] color palettes: use palettes from the R package cols4all. Run
`cols4all::c4a_gui()` to explore them. The old palette name "Blues" is named
"brewer.blues"
Multiple palettes called "blues" found: "brewer.blues", "matplotlib.blues". The first one, "brewer.blues", is returned.

[plot mode] fit legend/component: Some legend items or map compoments do not
fit well, and are therefore rescaled.
ℹ Set the tmap option `component.autoscale = FALSE` to disable rescaling.

# Prepare spatial layers in WGS84 (required for leaflet)
states_income_leaflet <- st_transform(states_income, crs = 4326)
cities_sf_leaflet <- st_transform(cities_sf, crs = 4326)

# Extract lon/lat from city points
cities_coords <- cities_sf_leaflet %>%
  dplyr::mutate(
    lon = sf::st_coordinates(.)[, 1],
    lat = sf::st_coordinates(.)[, 2]
  ) %>%
  dplyr::select(name, lon, lat) %>%
  as.data.frame()

# Create leaflet map
leaflet_map <- leaflet() %>%
  addTiles(group = "Base Map") %>%

  # State boundaries
  addPolygons(data = states_income_leaflet,
              color = "blue", weight = 1, fill = FALSE,
              label = ~NAME_1,
              group = "State Boundaries") %>%

  # Income choropleth
  addPolygons(data = states_income_leaflet,
              fillColor = ~colorNumeric("YlOrRd", mdn_income)(mdn_income),
              fillOpacity = 0.7, weight = 1, color = "#444444",
              label = ~paste(NAME_1, "<br>Income:", round(mdn_income, 2)),
              highlightOptions = highlightOptions(weight = 2, color = "black", bringToFront = TRUE),
              group = "Income Choropleth") %>%

  # Major cities with labels
  addCircleMarkers(data = cities_coords,
                   lng = ~lon,
                   lat = ~lat,
                   radius = 5,
                   color = "red",
                   stroke = TRUE,
                   fillOpacity = 0.9,
                   label = ~name,
                   group = "Major Cities") %>%

  # Controls
  addLayersControl(
    overlayGroups = c("State Boundaries", "Income Choropleth", "Major Cities"),
    options = layersControlOptions(collapsed = FALSE)
  )

leaflet_map
sample_proclamation <- c(
  "we declare the right of the people of ireland",
  "we place the cause of the irish republic under the protection of god",
  "we ask the blessing of the nation and of humanity",
  "we resolve to pursue the happiness and prosperity of the nation",
  "we strike in full confidence of victory",
  "we proclaim the irish republic as a sovereign independent state",
  "we pledge our lives to the cause of freedom",
  "we call upon our fellow countrymen to support us",
  "we stand ready to defend our nation and its rights",
  "we honour the sacrifice of those who came before us"
)

# Convert to tibble
proclamation_df <- tibble::tibble(line = 1:10, text = sample_proclamation)
data("stop_words")

# Bigrams
bigrams <- proclamation_df %>%
  tidytext::unnest_tokens(bigram, text, token = "ngrams", n = 2) %>%
  tidyr::separate(bigram, c("word1", "word2"), sep = " ") %>%
  dplyr::filter(!word1 %in% stop_words$word,
                !word2 %in% stop_words$word) %>%
  tidyr::unite(bigram, word1, word2, sep = " ") %>%
  count(bigram, sort = TRUE)

# Trigrams
trigrams <- proclamation_df %>%
  tidytext::unnest_tokens(trigram, text, token = "ngrams", n = 3) %>%
  tidyr::separate(trigram, c("word1", "word2", "word3"), sep = " ") %>%
  dplyr::filter(!word1 %in% stop_words$word,
                !word2 %in% stop_words$word,
                !word3 %in% stop_words$word) %>%
  tidyr::unite(trigram, word1, word2, word3, sep = " ") %>%
  count(trigram, sort = TRUE)

# View
bigrams
# A tibble: 4 × 2
  bigram                    n
  <chr>                 <int>
1 irish republic            2
2 fellow countrymen         1
3 sovereign independent     1
4 stand ready               1
trigrams
# A tibble: 0 × 2
# ℹ 2 variables: trigram <chr>, n <int>
bow <- proclamation_df %>%
  tidytext::unnest_tokens(word, text) %>%
  dplyr::anti_join(stop_words, by = "word") %>%
  dplyr::count(word, sort = TRUE)

bow
# A tibble: 33 × 2
   word           n
   <chr>      <int>
 1 nation         3
 2 irish          2
 3 republic       2
 4 blessing       1
 5 call           1
 6 confidence     1
 7 countrymen     1
 8 declare        1
 9 defend         1
10 fellow         1
# ℹ 23 more rows
set.seed(123)
wordcloud::wordcloud(
  words = bow$word,
  freq = bow$n,
  min.freq = 1,
  max.words = 100,
  colors = RColorBrewer::brewer.pal(8, "Dark2")
)

co_occurrence <- proclamation_df %>%
  tidytext::unnest_tokens(word, text) %>%
  dplyr::anti_join(stop_words, by = "word") %>%
  pairwise_count(word, line, sort = TRUE, upper = FALSE)

co_occurrence %>%
  # filter(n >= 1) %>%
  igraph::graph_from_data_frame() %>%
  ggraph::ggraph(layout = "fr") +
  ggraph::geom_edge_link(aes(edge_alpha = n), show.legend = FALSE) +
  ggraph::geom_node_point(color = "lightblue", size = 5) +
  ggraph::geom_node_text(aes(label = name), repel = TRUE) +
  ggplot2::theme_void() +
  ggplot2::labs(title = "Semantic Network of Co-occurring Words")

tokenized <- text2vec::word_tokenizer(tolower(sample_proclamation))

# Remove stop words from each tokenized sentence
tokenized_clean <- lapply(tokenized, function(words) {
  words[!words %in% stop_words$word]
})

max_len <- max(lengths(tokenized_clean))

tree_data <- tokenized_clean %>%
  lapply(function(x) c(x, rep(NA, max_len - length(x)))) %>%
  do.call(rbind, .) %>%
  as.data.frame(stringsAsFactors = FALSE)

colnames(tree_data) <- paste0("word", 1:ncol(tree_data))

collapsibleTree::collapsibleTree(
  tree_data,
  hierarchy = colnames(tree_data),
  root = "proclamation",
  width = 800,
  height = 600,
  fontSize = 16,
  zoomable = TRUE
)